import jieba
from gxl_ai_utils.utils import utils_file
import tqdm

input_text_path = "output_data/text_nospace_ABC_nospecial_nolong.list"
output_text_path = "output_data/text_nospace_ABC_nospecial_nolong_wordseg2.list"
word_path = "/home/work_nfs8/xlgeng/new_workspace/wenet_gxl_en_cn_2/examples/aishell/s0/data_input/wordlist"
jieba.load_userdict(word_path)
timer = utils_file.GxlTimer()
text_lines = utils_file.load_list_file_clean(input_text_path)
res_list = []
for line_i in tqdm.tqdm(text_lines, total=len(text_lines)):
    seg_list = jieba.cut(line_i)
    line_i = " ".join(seg_list)
    # utils_file.logging_print(line_i)
    res_list.append(line_i)
utils_file.write_list_to_file(res_list, output_text_path)
timer.stop_halfway()
